#Andrew Gooch
#April 11, 2017
#Ripping Yarn, Political Communications
#Replication R file 5: weighting data from second experiment

#This R file recodes treatment variables and demographic variables, and then uses the demos to weight the data using the survey package.

#required packages
library(arm)
library(ggplot2)
library(grid)
library(gridExtra)
library(foreign)
library(survey)


######################################
#MAY
#######################################

#loads data. Please change to your file directory where the data is located in your terminal
May15 <- read.dta("C:/Users/agooch/Dropbox/Working papers/Ripping Yarn/Data/515_12.dta")
attach(May15)

names(May15)

table(May15$Gtreatment)
May15$argnum <- as.numeric(May15$Gtreatment)
table(May15$argnum)
May15$ControlDummy <- May15$argnum
May15$ControlDummy[May15$argnum == 1] <- 1 #control
May15$ControlDummy[May15$argnum == 2] <- 0 #cue
May15$ControlDummy[May15$argnum == 3] <- 0 #anonymous story

#AA
table(May15$Grace)
May15$black <- May15$Grace
May15$black[May15$Grace == 1] <- 0 
May15$black[May15$Grace == 2] <- 1 #black
May15$black[May15$Grace == 3] <- 0 
May15$black[May15$Grace == 4] <- 0 
table(May15$black, May15$Grace)


table(May15$Grace)
May15$white <- May15$Grace
May15$white[May15$Grace == 1] <- 1 #white 
May15$white[May15$Grace == 2] <- 0 
May15$white[May15$Grace == 3] <- 0 
May15$white[May15$Grace == 4] <- 0 
table(May15$white, May15$Grace)


table(May15$Grace)
May15$hispanic <- May15$Grace
May15$hispanic[May15$Grace == 1] <- 0 
May15$hispanic[May15$Grace == 2] <- 0 
May15$hispanic[May15$Grace == 3] <- 1 #hispanic
May15$hispanic[May15$Grace == 4] <- 0 
table(May15$hispanic, May15$Grace)



#female
table(May15$Ggender)
May15$female <- May15$Ggender
May15$female[May15$Ggender == 1] <- 0 
May15$female[May15$Ggender == 2] <- 1 #female
table(May15$female, May15$Ggender)


table(May15$Gtreatment)
May15$argnum <- as.numeric(May15$Gtreatment)
table(May15$argnum)
May15$CueDummy <- May15$argnum
May15$CueDummy[May15$argnum == 1] <- 0 #control
May15$CueDummy[May15$argnum == 2] <- 1 #cue
May15$CueDummy[May15$argnum == 3] <- 0 #anonymous story


table(May15$Gtreatment)
May15$argnum <- as.numeric(May15$Gtreatment)
table(May15$argnum)
May15$AnonStoryDummy <- May15$argnum
May15$AnonStoryDummy[May15$argnum == 1] <- 0 #control
May15$AnonStoryDummy[May15$argnum == 2] <- 0 #cue
May15$AnonStoryDummy[May15$argnum == 3] <- 1 #anonymous story

#age
table(May15$Gage)
May15$Gage2 <- May15$Gage
May15$Gage2[May15$Gage == 1] <- 1 #18-24
May15$Gage2[May15$Gage == 2] <- 2 #25-34
May15$Gage2[May15$Gage == 3] <- 2 #35-44
May15$Gage2[May15$Gage == 4] <- 2 #45-64
May15$Gage2[May15$Gage == 5] <- 3 #65+
table(May15$Gage2, May15$Gage)

#pid for newer question wording
table(May15$Gpid3)
May15$pid5_new <- May15$Gpid3
May15$pid5_new[May15$Gpid3 == 1] <- -1 #dem
May15$pid5_new[May15$Gpid3 == 2] <-  1#rep
May15$pid5_new[May15$Gpid3 == 3] <- 0 #ind
May15$pid5_new[May15$Gpid3 == 4] <- 0 #other
table(May15$pid5_new)


table(May15$Gindlean)
May15$leaner_new <- May15$Gindlean
May15$leaner_new[May15$Gindlean == 1] <- -1 #lean dem
May15$leaner_new[May15$Gindlean == 2] <-  1#lean rep
May15$leaner_new[May15$Gindlean == 3] <- 0 #neither
table(May15$leaner_new)

#3-point with leaners as partisans
May15$PID3_lean_new <- May15$leaner_new
May15$PID3_lean_new[May15$leaner_new== -1 | May15$pid5_new == -1] <- 1 #dem
May15$PID3_lean_new[May15$leaner_new== 0] <- 0 #ind
May15$PID3_lean_new[May15$leaner_new== 1 | May15$pid5_new == 1] <- -1 #rep
table(May15$PID3_lean_new)
table(May15$PID3_lean_new, May15$pid5_new)
table(May15$PID3_lean_new, May15$leaner_new)

# SUPPORT

table(May15$Gcontrol)
May15$argnum <- as.numeric(May15$Gcontrol)
table(May15$argnum)
May15$SuppCont <- May15$argnum
May15$SuppCont[May15$argnum == 1] <- 1 #strong fav
May15$SuppCont[May15$argnum == 2] <- .75 #fav
May15$SuppCont[May15$argnum == 3] <- 0.5 #neither
May15$SuppCont[May15$argnum == 4] <- 0.25 #oppose
May15$SuppCont[May15$argnum == 5] <- 0 #strong opp
May15$SuppCont[May15$argnum == 6] <- NA #dk
table(May15$SuppCont, May15$Gcontrol)


table(May15$Gcue)
May15$argnum <- as.numeric(May15$Gcue)
table(May15$argnum)
May15$SuppCue <- May15$argnum
May15$SuppCue[May15$argnum == 1] <- 1 #strong fav
May15$SuppCue[May15$argnum == 2] <- .75 #fav
May15$SuppCue[May15$argnum == 3] <- 0.5 #neither
May15$SuppCue[May15$argnum == 4] <- 0.25 #oppose
May15$SuppCue[May15$argnum == 5] <- 0 #strong opp
May15$SuppCue[May15$argnum == 6] <- NA #dk
table(May15$SuppCue, May15$Gcue)

table(May15$Ggoodjust)
May15$argnum <- as.numeric(May15$Ggoodjust)
table(May15$argnum)
May15$SuppAnonStory <- May15$argnum
May15$SuppAnonStory[May15$argnum == 1] <- 1 #strong fav
May15$SuppAnonStory[May15$argnum == 2] <- .75 #fav
May15$SuppAnonStory[May15$argnum == 3] <- 0.5 #neither
May15$SuppAnonStory[May15$argnum == 4] <- 0.25 #oppose
May15$SuppAnonStory[May15$argnum == 5] <- 0 #strong opp
May15$SuppAnonStory[May15$argnum == 6] <- NA #dk
table(May15$SuppAnonStory, May15$Ggoodjust)

table(May15$Gfavor)
May15$argnum <- as.numeric(May15$Gfavor)
table(May15$argnum)
May15$BidenFavor <- May15$argnum
May15$BidenFavor[May15$argnum == 1] <- 1 #strong fav
May15$BidenFavor[May15$argnum == 2] <- .75 #fav
May15$BidenFavor[May15$argnum == 3] <- 0.5 #neither
May15$BidenFavor[May15$argnum == 4] <- 0.25 #oppose
May15$BidenFavor[May15$argnum == 5] <- 0 #strong opp
May15$BidenFavor[May15$argnum == 6] <- NA #dk
table(May15$BidenFavor, May15$Gfavor)


table(May15$Gfavor)
May15$argnum <- as.numeric(May15$Gfavor)
table(May15$argnum)
May15$BidenFavor2 <- May15$argnum
May15$BidenFavor2[May15$argnum == 1] <- 1 #strong fav
May15$BidenFavor2[May15$argnum == 2] <- 1 #fav
May15$BidenFavor2[May15$argnum == 3] <- 0 #neither
May15$BidenFavor2[May15$argnum == 4] <- 0 #oppose
May15$BidenFavor2[May15$argnum == 5] <- 0 #strong opp
May15$BidenFavor2[May15$argnum == 6] <- 0 #dk
table(May15$BidenFavor2, May15$Gfavor)




#####
#Weighting
#####

#creates dataset to make weights
May15.svy.unweighted <- svydesign(ids=~1, data=May15)
summary(May15.svy.unweighted)


#population dataframes (weights target this level)
gender.dist <- data.frame(Ggender = c("1", "2"),
                          Freq = nrow(May15) * c(0.4729167, 0.5270833))

age.dist <- data.frame(Gage2 = c("1", "2", "3"),
                       Freq = nrow(May15) * c(0.09, 0.69, .22))

race.dist <- data.frame(Grace = c("1", "2", "3", "4"),
                        Freq = nrow(May15) * c(0.7374387, 0.1339847,0.0841532,0.0444234))




#sample.margins = var from survey data
#population.margins = weighted var
#design = survey dataset made in line 22
#This is the weighted dataset
May15.svy.rake <- rake(design = May15.svy.unweighted,
                       sample.margins = list(~Ggender, ~Grace),
                       population.margins = list(gender.dist, race.dist))

#new datatset containing the weight
summary(May15.svy.rake)

#Compares distros
prop.table(table(May15$Ggender))
prop.table(svytable(~Ggender, design = May15.svy.rake))

#Compares distros
prop.table(table(May15$Gage2))
prop.table(svytable(~Gage2, design = May15.svy.rake))

#Compares distros
prop.table(table(May15$Grace))
prop.table(svytable(~Grace, design = May15.svy.rake))



#shows how much people are weighted. We don't want a big MAX number because that would mean one person is being weighted up a lot
#no lower than .5 or higher than 2.5
summary(weights(May15.svy.rake))

#sample sizes by treatment arm
svytable(~Gtreatment, design = May15.svy.rake)
